En los Ćŗltimos a??os se ha desarrollado notablemente la producci??n y comercializaci??n de cerveza artesanal, convirtiendose en una bebida muy popular. Generalmente, al visitar un establecimiento de venta de cerveza artesanal la carta presenta la misma con el nombre y dos par??metros llamados IBU (International bittering units) que describe cuan amarga es una cerveza y el par??metro ABV (Alcohol By Volume), que va desde cero (sin alcohol) hasta 1 (alcohol puro). En este pr??ctico exploraremos un dataset mediante m??todos de clustering para evaluar si solo con los valores de IBU y ABV es posible distinguir entre estilos de cerveza y as?? poder decidirnos por cual comprar. Utilizamos el data set Kaggle āCraft Beerā (https://www.kaggle.com/nickhould/craft-cans/data). El mismo contiene una lista de 2410 cervezas y 510 fabricantes de cervezas en los estados unidos.
#if (!require("pacman")) install.packages("pacman")
pacman::p_load(tidyverse, skimr, GGally, plotly, viridis, caret, randomForest, e1071, rpart, xgboost, h2o, corrplot, rpart.plot, corrgram, lightgbm)
## Installing package into 'C:/Users/marcelo.cena/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## Warning: package 'lightgbm' is not available (for R version 3.5.0)
## Warning: unable to access index for repository http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/3.5:
## no fue posible abrir la URL 'http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/3.5/PACKAGES'
## Bioconductor version 3.7 (BiocInstaller 1.30.0), ?biocLite for help
## Warning in p_install(package, character.only = TRUE, ...):
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'lightgbm'
## Warning in pacman::p_load(tidyverse, skimr, GGally, plotly, viridis, caret, : Failed to install/load:
## lightgbm
vinotinto <- read.csv("./winequality-red.csv",header=TRUE)
``
head(vinotinto)
## fixed_acidity volatile_acidity citric_acid residual_sugar chlorides
## 1 7.4 0.70 0.00 1.9 0.076
## 2 7.8 0.88 0.00 2.6 0.098
## 3 7.8 0.76 0.04 2.3 0.092
## 4 11.2 0.28 0.56 1.9 0.075
## 5 7.4 0.70 0.00 1.9 0.076
## 6 7.4 0.66 0.00 1.8 0.075
## free_sulfur_dioxide total_sulfur_dioxide density pH sulphates alcohol
## 1 11 34 0.9978 3.51 0.56 9.4
## 2 25 67 0.9968 3.20 0.68 9.8
## 3 15 54 0.9970 3.26 0.65 9.8
## 4 17 60 0.9980 3.16 0.58 9.8
## 5 11 34 0.9978 3.51 0.56 9.4
## 6 13 40 0.9978 3.51 0.56 9.4
## quality
## 1 5
## 2 5
## 3 5
## 4 6
## 5 5
## 6 5
vinotinto %>% skim() %>% kable()
## Skim summary statistics
## n obs: 1599
## n variables: 12
##
## Variable type: integer
##
## variable missing complete n mean sd p0 p25 p50 p75 p100 hist
## --------- -------- --------- ----- ----- ----- --- ---- ---- ---- ----- ---------
## quality 0 1599 1599 5.64 0.81 3 5 6 6 8 <U+2581><U+2581><U+2581><U+2587><U+2587><U+2581><U+2582><U+2581>
##
## Variable type: numeric
##
## variable missing complete n mean sd p0 p25 p50 p75 p100 hist
## --------------------- -------- --------- ----- ------ ------- ------ ----- ------ ----- ----- ---------
## alcohol 0 1599 1599 10.42 1.07 8.4 9.5 10.2 11.1 14.9 <U+2582><U+2587><U+2585><U+2583><U+2582><U+2581><U+2581><U+2581>
## chlorides 0 1599 1599 0.087 0.047 0.012 0.07 0.079 0.09 0.61 <U+2587><U+2583><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
## citric_acid 0 1599 1599 0.27 0.19 0 0.09 0.26 0.42 1 <U+2587><U+2585><U+2585><U+2586><U+2582><U+2581><U+2581><U+2581>
## density 0 1599 1599 1 0.0019 0.99 1 1 1 1 <U+2581><U+2581><U+2583><U+2587><U+2587><U+2582><U+2581><U+2581>
## fixed_acidity 0 1599 1599 8.32 1.74 4.6 7.1 7.9 9.2 15.9 <U+2581><U+2587><U+2587><U+2585><U+2582><U+2581><U+2581><U+2581>
## free_sulfur_dioxide 0 1599 1599 15.87 10.46 1 7 14 21 72 <U+2587><U+2587><U+2585><U+2582><U+2581><U+2581><U+2581><U+2581>
## pH 0 1599 1599 3.31 0.15 2.74 3.21 3.31 3.4 4.01 <U+2581><U+2581><U+2585><U+2587><U+2585><U+2581><U+2581><U+2581>
## residual_sugar 0 1599 1599 2.54 1.41 0.9 1.9 2.2 2.6 15.5 <U+2587><U+2582><U+2581><U+2581><U+2581><U+2581><U+2581><U+2581>
## sulphates 0 1599 1599 0.66 0.17 0.33 0.55 0.62 0.73 2 <U+2582><U+2587><U+2582><U+2581><U+2581><U+2581><U+2581><U+2581>
## total_sulfur_dioxide 0 1599 1599 46.47 32.9 6 22 38 62 289 <U+2587><U+2585><U+2582><U+2581><U+2581><U+2581><U+2581><U+2581>
## volatile_acidity 0 1599 1599 0.53 0.18 0.12 0.39 0.52 0.64 1.58 <U+2582><U+2587><U+2587><U+2583><U+2581><U+2581><U+2581><U+2581>
Veamos las correlaciones que existen entre las variables
vinotinto %>% cor() %>% corrplot.mixed(upper = "ellipse", tl.cex=.8, tl.pos = 'lt', number.cex = .8)
vinotinto %>%
mutate(quality = as.factor(quality)) %>%
select(-c(sulphates, chlorides)) %>%
ggpairs(aes(color = quality, alpha=0.4),
columns=1:9,
lower=list(continuous="points"),
upper=list(continuous="blank"),
axisLabels="none", switch="both")
vinotinto_n_zscore1 <- vinotinto
for(j in seq_len(ncol(vinotinto_n_zscore1))) {
if (j!="12") vinotinto_n_zscore1[,j] <- scale(vinotinto_n_zscore1[,j])
}
vinotinto_n_zscore <- as.data.frame(vinotinto_n_zscore1)
head(vinotinto_n_zscore)
## fixed_acidity volatile_acidity citric_acid residual_sugar chlorides
## 1 -0.5281944 0.9615758 -1.391037 -0.45307667 -0.24363047
## 2 -0.2984541 1.9668271 -1.391037 0.04340257 0.22380518
## 3 -0.2984541 1.2966596 -1.185699 -0.16937425 0.09632273
## 4 1.6543385 -1.3840105 1.483689 -0.45307667 -0.26487754
## 5 -0.5281944 0.9615758 -1.391037 -0.45307667 -0.24363047
## 6 -0.5281944 0.7381867 -1.391037 -0.52400227 -0.26487754
## free_sulfur_dioxide total_sulfur_dioxide density pH
## 1 -0.46604672 -0.3790141 0.55809987 1.2882399
## 2 0.87236532 0.6241680 0.02825193 -0.7197081
## 3 -0.08364328 0.2289750 0.13422152 -0.3310730
## 4 0.10755844 0.4113718 0.66406945 -0.9787982
## 5 -0.46604672 -0.3790141 0.55809987 1.2882399
## 6 -0.27484500 -0.1966174 0.55809987 1.2882399
## sulphates alcohol quality
## 1 -0.57902538 -0.9599458 5
## 2 0.12891007 -0.5845942 5
## 3 -0.04807379 -0.5845942 5
## 4 -0.46103614 -0.5845942 6
## 5 -0.57902538 -0.9599458 5
## 6 -0.57902538 -0.9599458 5
vinotinto %>%
plot_ly(x=~alcohol,y=~volatile_acidity,z= ~sulphates, color=~quality, hoverinfo = 'text', colors = viridis(3),
text = ~paste('Calidad:', quality,
'<br>Alcohol:', alcohol,
'<br>Acidez volatil:', volatile_acidity,
'<br>Sulfatos:', sulphates)) %>%
add_markers(opacity = 0.8) %>%
layout(title = "3D Calidad del vino",
annotations=list(yref='paper',xref="paper",y=1.05,x=1.1, text="quality",showarrow=F),
scene = list(xaxis = list(title = 'Alcohol'),
yaxis = list(title = 'Acidez volatil'),
zaxis = list(title = 'Sulfatos')))
vinotinto %>%
plot_ly(x=~alcohol,y=~pH,z= ~citric_acid, color=~quality, hoverinfo = 'text', colors = viridis(3),
text = ~paste('Calidad:', quality,
'<br>Alcohol:', alcohol,
'<br>PH:', pH,
'<br>Acido Citrico:', citric_acid)) %>%
add_markers(opacity = 0.8) %>%
layout(title = "3D Calidad del Vino",
annotations=list(yref='paper',xref="paper",y=1.05,x=1.1, text="quality",showarrow=F),
scene = list(xaxis = list(title = 'Alcohol'),
yaxis = list(title = 'PH'),
zaxis = list(title = 'Acido Citrico')))
vinotinto %>%
plot_ly(x=~total_sulfur_dioxide,y=~fixed_acidity,z= ~residual_sugar, color=~quality, hoverinfo = 'text', colors = viridis(3),
text = ~paste('Calidad:', quality,
'<br>Dioxido de sulfuro total:', total_sulfur_dioxide,
'<br>Acidez:', fixed_acidity,
'<br>Azucar residual:', residual_sugar)) %>%
add_markers(opacity = 0.8) %>%
layout(title = "3D Calidad del Vino",
annotations=list(yref='paper',xref="paper",y=1.05,x=1.1, text="quality",showarrow=F),
scene = list(xaxis = list(title = 'Dioxido de sulfuro total'),
yaxis = list(title = 'Acidez'),
zaxis = list(title = 'Azucar Residual')))
library(mclust)
## Package 'mclust' version 5.4
## Type 'citation("mclust")' for citing this R package in publications.
##
## Attaching package: 'mclust'
## The following object is masked from 'package:purrr':
##
## map
library(cluster)
library(factoextra)
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
#Elbow method
set.seed(97)
fviz_nbclust(vinotinto_n_zscore1[,2:8], kmeans, nstart = 30, method = "wss")
fviz_nbclust(vinotinto_n_zscore1[,2:8], kmeans, nstart = 30, method = "silhouette")
fviz_nbclust(vinotinto_n_zscore1[,2:8], kmeans, nstart = 30, method = "gap_stat", nboot = 500)
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations